Getting and Preparing the Data

Firstly, I have loaded the necessary packages for visualization and data manipulation that needed for homework tasks.

library(tidyverse)
library(plotly)
library(htmltools)
library(reshape)

Then, the uWave Gesture data have been downloaded from the provided source by the instructor.

download.file("https://drive.google.com/u/1/uc?id=1KDhDT0FE5lkjvn62YTCJ87vZ7A5uS5TT&export=download",
              destfile = "./uWaveGestureLibrary_X_TRAIN.txt")
download.file("https://drive.google.com/u/1/uc?id=1fZCNBdJ40Df5werSu_Ud4GUmCBcBIfaI&export=download",
              destfile = "./uWaveGestureLibrary_Y_TRAIN.txt")
download.file("https://drive.google.com/u/1/uc?id=1jdZ2_NiFil0b4EbLBAfDJ43VQcOgulpf&export=download",
              destfile = "./uWaveGestureLibrary_Z_TRAIN.txt")

Then, the data have been loaded into R using read.table() function, since the data has .txt extension. Training data set of the coordinate data are used for this homework. Their first feature is renamed as Class, because it indicates the class of the gesture.

x <- read.table("uWaveGestureLibrary_X_TRAIN.txt")
y <- read.table("uWaveGestureLibrary_Y_TRAIN.txt")
z<- read.table("uWaveGestureLibrary_Z_TRAIN.txt")

colnames(x)[1] <- "Class"
colnames(y)[1] <- "Class"
colnames(z)[1] <- "Class"

Task A

In this task, I have selected the first observation of each class for 3D visualization. I have achieved the velocity data by summing the acceleration data cumulatively. After obtaining the velocity data, again by summing them cumulatively, I have achieved the position data. The 3D visualizations of the gestures somehow fit to the gesture vocabulary, but it is not perfect. Here is the gesture vocabulary:

class1 <- cbind(x[x$Class == 1, 2:316], y[y$Class == 1, 2:316], z[z$Class == 1, 2:316])
instance_class1 <- class1[1,]
instance_class1 <- data.frame(t(instance_class1[, 1:315]), t(instance_class1[, 316:630]), t(instance_class1[, 631:945]))
instance_class1 <- data.frame(x = cumsum(instance_class1$X11), y = cumsum(instance_class1$X11.1), z = cumsum(instance_class1$X11.2))
instance_class1 <- data.frame(x = cumsum(instance_class1$x), y = cumsum(instance_class1$y), z = cumsum(instance_class1$z))
fig <- plot_ly(instance_class1, x = ~x, y = ~y, z = ~z)
fig <- div(fig, align = "center")
fig
class2 <- cbind(x[x$Class == 2, 2:316], y[y$Class == 2, 2:316], z[z$Class == 2, 2:316])
instance_class2 <- class2[1,]
instance_class2 <- data.frame(t(instance_class2[, 1:315]), t(instance_class2[, 316:630]), t(instance_class2[, 631:945]))
instance_class2 <- data.frame(x = cumsum(instance_class2$X15), y = cumsum(instance_class2$X15.1), z = cumsum(instance_class2$X15.2))
instance_class2 <- data.frame(x = cumsum(instance_class2$x), y = cumsum(instance_class2$y), z = cumsum(instance_class2$z))
fig <- plot_ly(instance_class2, x = ~x, y = ~y, z = ~z)
fig <- div(fig, align = "center")
fig
class3 <- cbind(x[x$Class == 3, 2:316], y[y$Class == 3, 2:316], z[z$Class == 3, 2:316])
instance_class3 <- class3[1,]
instance_class3 <- data.frame(t(instance_class3[, 1:315]), t(instance_class3[, 316:630]), t(instance_class3[, 631:945]))
instance_class3 <- data.frame(x = cumsum(instance_class3$X4), y = cumsum(instance_class3$X4.1), z = cumsum(instance_class3$X4.2))
instance_class3 <- data.frame(x = cumsum(instance_class3$x), y = cumsum(instance_class3$y), z = cumsum(instance_class3$z))
fig <- plot_ly(instance_class3, x = ~x, y = ~y, z = ~z)
fig <- div(fig, align = "center")
fig
class4 <- cbind(x[x$Class == 4, 2:316], y[y$Class == 4, 2:316], z[z$Class == 4, 2:316])
instance_class4 <- class4[1,]
instance_class4 <- data.frame(t(instance_class4[, 1:315]), t(instance_class4[, 316:630]), t(instance_class4[, 631:945]))
instance_class4 <- data.frame(x = cumsum(instance_class4$X5), y = cumsum(instance_class4$X5.1), z = cumsum(instance_class4$X5.2))
instance_class4 <- data.frame(x = cumsum(instance_class4$x), y = cumsum(instance_class4$y), z = cumsum(instance_class4$z))
fig <- plot_ly(instance_class4, x = ~x, y = ~y, z = ~z)
fig <- div(fig, align = "center")
fig
class5 <- cbind(x[x$Class == 5, 2:316], y[y$Class == 5, 2:316], z[z$Class == 5, 2:316])
instance_class5 <- class5[1,]
instance_class5 <- data.frame(t(instance_class5[, 1:315]), t(instance_class5[, 316:630]), t(instance_class5[, 631:945]))
instance_class5 <- data.frame(x = cumsum(instance_class5$X2), y = cumsum(instance_class5$X2.1), z = cumsum(instance_class5$X2.2))
instance_class5 <- data.frame(x = cumsum(instance_class5$x), y = cumsum(instance_class5$y), z = cumsum(instance_class5$z))
fig <- plot_ly(instance_class5, x = ~x, y = ~y, z = ~z)
fig <- div(fig, align = "center")
fig
class6 <- cbind(x[x$Class == 6, 2:316], y[y$Class == 6, 2:316], z[z$Class == 6, 2:316])
instance_class6 <- class6[1,]
instance_class6 <- data.frame(t(instance_class6[, 1:315]), t(instance_class6[, 316:630]), t(instance_class6[, 631:945]))
instance_class6 <- data.frame(x = cumsum(instance_class6$X1), y = cumsum(instance_class6$X1.1), z = cumsum(instance_class6$X1.2))
instance_class6 <- data.frame(x = cumsum(instance_class6$x), y = cumsum(instance_class6$y), z = cumsum(instance_class6$z))
fig <- plot_ly(instance_class6, x = ~x, y = ~y, z = ~z)
fig <- div(fig, align = "center")
fig
class7 <- cbind(x[x$Class == 7, 2:316], y[y$Class == 7, 2:316], z[z$Class == 7, 2:316])
instance_class7 <- class7[1,]
instance_class7 <- data.frame(t(instance_class7[, 1:315]), t(instance_class7[, 316:630]), t(instance_class7[, 631:945]))
instance_class7 <- data.frame(x = cumsum(instance_class7$X7), y = cumsum(instance_class7$X7.1), z = cumsum(instance_class7$X7.2))
instance_class7 <- data.frame(x = cumsum(instance_class7$x), y = cumsum(instance_class7$y), z = cumsum(instance_class7$z))
fig <- plot_ly(instance_class7, x = ~x, y = ~y, z = ~z)
fig <- div(fig, align = "center")
fig
class8 <- cbind(x[x$Class == 8, 2:316], y[y$Class == 8, 2:316], z[z$Class == 8, 2:316])
instance_class8 <- class8[1,]
instance_class8 <- data.frame(t(instance_class8[, 1:315]), t(instance_class8[, 316:630]), t(instance_class8[, 631:945]))
instance_class8 <- data.frame(x = cumsum(instance_class8$X6), y = cumsum(instance_class8$X6.1), z = cumsum(instance_class8$X6.2))
instance_class8 <- data.frame(x = cumsum(instance_class8$x), y = cumsum(instance_class8$y), z = cumsum(instance_class8$z))
fig <- plot_ly(instance_class8, x = ~x, y = ~y, z = ~z)
fig <- div(fig, align = "center")
fig

Task B

In this task, data is manipulated into long format, which is achieved by giving an ID to the observations and melting the coordinate data. The final form of the data that I wanted to achieve can be seen in this picture:

As can be seen in the summary of the Principle Component Analysis, we are keeping the almost 50% of the variance in the data set by first principle component of the data. Therefore, I have used the first principle component in order to represent the 3D coordinate data as 1D, and plotted the time series of the arbitrary two instances of each classes. As can be seen in the plots below, the time series are almost identical, therefore PCA can be useful in order to lower the dimension of the data.

x$ID <- c(1:896)
y$ID <- c(1:896)
z$ID <- c(1:896)

x_melt <- melt(x, id = c("ID", "Class"))
y_melt <- melt(y, id = c("ID", "Class"))
z_melt <- melt(z, id = c("ID", "Class"))

all_data <- data.frame(ID = x_melt$ID, Class = x_melt$Class, Variable = x_melt$variable,
                       X = x_melt$value, Y = y_melt$value, Z = z_melt$value)

all_data <- all_data[order(all_data$ID), ]
colnames(all_data)[3] <- "TimeIndex"
all_data$TimeIndex <- rep(1:315, 896)

all_data_pca <- princomp(all_data[, 4:6])
summary(all_data_pca)
## Importance of components:
##                          Comp.1    Comp.2    Comp.3
## Standard deviation     1.211244 1.0182382 0.6975346
## Proportion of Variance 0.490595 0.3467037 0.1627014
## Cumulative Proportion  0.490595 0.8372986 1.0000000
all_data_firstpc <- data.frame(ID = all_data$ID, Class = all_data$Class, TimeIndex = all_data$TimeIndex,
                               FirstPC = all_data_pca$scores[,1])

class1_pc <- data.frame(TimeIndex = c(1:315), Observation11 = all_data_firstpc$FirstPC[all_data$ID == 11],
                        Observation17 = all_data_firstpc$FirstPC[all_data$ID == 17])
g <- ggplot(class1_pc) +
  geom_line(aes(x = TimeIndex, y = Observation11), color = "red") +
  geom_line(aes(x = TimeIndex, y = Observation17), color = "black") +
  ylab("Observations")
g <- ggplotly(g)
g <- div(g, align = "center")
g
class2_pc <- data.frame(TimeIndex = c(1:315), Observation29 = all_data_firstpc$FirstPC[all_data$ID == 29],
                        Observation31 = all_data_firstpc$FirstPC[all_data$ID == 31])
g <- ggplot(class2_pc) +
  geom_line(aes(x = TimeIndex, y = Observation29), color = "red") +
  geom_line(aes(x = TimeIndex, y = Observation31), color = "black") +
  ylab("Observations")
g <- ggplotly(g)
g <- div(g, align = "center")
g
class3_pc <- data.frame(TimeIndex = c(1:315), Observation27 = all_data_firstpc$FirstPC[all_data$ID == 27],
                        Observation61 = all_data_firstpc$FirstPC[all_data$ID == 61])
g <- ggplot(class3_pc) +
  geom_line(aes(x = TimeIndex, y = Observation27), color = "red") +
  geom_line(aes(x = TimeIndex, y = Observation61), color = "black") +
  ylab("Observations")
g <- ggplotly(g)
g <- div(g, align = "center")
g
class4_pc <- data.frame(TimeIndex = c(1:315), Observation5 = all_data_firstpc$FirstPC[all_data$ID == 5],
                        Observation8 = all_data_firstpc$FirstPC[all_data$ID == 8])
g <- ggplot(class4_pc) +
  geom_line(aes(x = TimeIndex, y = Observation5), color = "red") +
  geom_line(aes(x = TimeIndex, y = Observation8), color = "black") +
  ylab("Observations")
g <- ggplotly(g)
g <- div(g, align = "center")
g
class5_pc <- data.frame(TimeIndex = c(1:315), Observation35 = all_data_firstpc$FirstPC[all_data$ID == 35],
                        Observation41 = all_data_firstpc$FirstPC[all_data$ID == 41])
g <- ggplot(class5_pc) +
  geom_line(aes(x = TimeIndex, y = Observation35), color = "red") +
  geom_line(aes(x = TimeIndex, y = Observation41), color = "black") +
  ylab("Observations")
g <- ggplotly(g)
g <- div(g, align = "center")
g
class6_pc <- data.frame(TimeIndex = c(1:315), Observation1 = all_data_firstpc$FirstPC[all_data$ID == 1],
                        Observation10 = all_data_firstpc$FirstPC[all_data$ID == 10])
g <- ggplot(class6_pc) +
  geom_line(aes(x = TimeIndex, y = Observation1), color = "red") +
  geom_line(aes(x = TimeIndex, y = Observation10), color = "black") +
  ylab("Observations")
g <- ggplotly(g)
g <- div(g, align = "center")
g
class7_pc <- data.frame(TimeIndex = c(1:315), Observation7 = all_data_firstpc$FirstPC[all_data$ID == 7],
                        Observation12 = all_data_firstpc$FirstPC[all_data$ID == 12])
g <- ggplot(class7_pc) +
  geom_line(aes(x = TimeIndex, y = Observation7), color = "red") +
  geom_line(aes(x = TimeIndex, y = Observation12), color = "black") +
  ylab("Observations")
g <- ggplotly(g)
g <- div(g, align = "center")
g
class8_pc <- data.frame(TimeIndex = c(1:315), Observation40 = all_data_firstpc$FirstPC[all_data$ID == 40],
                        Observation48 = all_data_firstpc$FirstPC[all_data$ID == 48])
g <- ggplot(class8_pc) +
  geom_line(aes(x = TimeIndex, y = Observation40), color = "red") +
  geom_line(aes(x = TimeIndex, y = Observation48), color = "black") +
  ylab("Observations")
g <- ggplotly(g)
g <- div(g, align = "center")
g

Task C

In this task, I have applied the PCA to the each class separately, to see whether first components has similar variance percentage explained. According to the PCA results, variance captured by the first component is bigger than the first gesture. Therefore it will be useful to use PCA approach separately on the different classes. We can capture most variance in the class 5 and class 8, PCA performs best in these classes.

split_data <- split(all_data, all_data$Class)
split_data_pca <- map(split_data, ~princomp(.[,4:6]))
map(split_data_pca, summary)
## $`1`
## Importance of components:
##                           Comp.1    Comp.2    Comp.3
## Standard deviation     1.1760927 0.9830166 0.8006000
## Proportion of Variance 0.4625331 0.3231331 0.2143339
## Cumulative Proportion  0.4625331 0.7856661 1.0000000
## 
## $`2`
## Importance of components:
##                           Comp.1    Comp.2    Comp.3
## Standard deviation     1.2380477 0.9667324 0.7232858
## Proportion of Variance 0.5125479 0.3125160 0.1749361
## Cumulative Proportion  0.5125479 0.8250639 1.0000000
## 
## $`3`
## Importance of components:
##                          Comp.1    Comp.2    Comp.3
## Standard deviation     1.271645 0.9460372 0.6916702
## Proportion of Variance 0.540744 0.2992789 0.1599771
## Cumulative Proportion  0.540744 0.8400229 1.0000000
## 
## $`4`
## Importance of components:
##                           Comp.1    Comp.2    Comp.3
## Standard deviation     1.2826327 0.9590307 0.6523722
## Proportion of Variance 0.5501287 0.3075563 0.1423150
## Cumulative Proportion  0.5501287 0.8576850 1.0000000
## 
## $`5`
## Importance of components:
##                           Comp.1    Comp.2     Comp.3
## Standard deviation     1.3912247 0.9062088 0.48348277
## Proportion of Variance 0.6472234 0.2746099 0.07816668
## Cumulative Proportion  0.6472234 0.9218333 1.00000000
## 
## $`6`
## Importance of components:
##                           Comp.1    Comp.2     Comp.3
## Standard deviation     1.3076795 0.9925902 0.54333711
## Proportion of Variance 0.5718239 0.3294577 0.09871846
## Cumulative Proportion  0.5718239 0.9012815 1.00000000
## 
## $`7`
## Importance of components:
##                           Comp.1    Comp.2    Comp.3
## Standard deviation     1.2460598 1.0128304 0.6418611
## Proportion of Variance 0.5192033 0.3430308 0.1377659
## Cumulative Proportion  0.5192033 0.8622341 1.0000000
## 
## $`8`
## Importance of components:
##                           Comp.1    Comp.2     Comp.3
## Standard deviation     1.3544144 0.9660374 0.47202702
## Proportion of Variance 0.6134269 0.3120668 0.07450636
## Cumulative Proportion  0.6134269 0.9254936 1.00000000

Task D

In this part, by using euclidean distance, classical multidimensional scaling is applied to the gesture data. The dimension has been lowered to 2D. We can somehow classify the gestures with the 2D representation of it. Different classes are grouped together. The gestures with the opposite moves can be seen distinctly in the classification plot.

x_distance <- dist(x[, 2:316], method = "euclidean")
y_distance <- dist(y[, 2:316], method = "euclidean")
z_distance <- dist(z[, 2:316], method = "euclidean")

total_distance <- x_distance + y_distance + z_distance
mds2d <- cmdscale(total_distance, k = 2)
scaled_data <- as.data.frame(mds2d)
scaled_data$Class <- x$Class

g <- ggplot(scaled_data, aes(x = V1, y = V2)) +
  geom_point(aes(color = factor(Class)))
g <- ggplotly(g)
g <- div(g, align = "center")
g